#setwd('/afs/inf.ed.ac.uk/user/s17/s1725186/Documents/PhD-Models/FirstPUModel/RMarkdowns')
library(tidyverse) ; library(reshape2) ; library(glue) ; library(plotly) ; library(dendextend)
library(RColorBrewer) ; library(viridis) ; require(gridExtra) ; library(GGally)
library(expss)
library(polycor)
library(foreach) ; library(doParallel)
library(knitr)
library(biomaRt)
library(anRichment) ; library(BrainDiseaseCollection)
suppressWarnings(suppressMessages(library(WGCNA)))
SFARI_colour_hue = function(r) {
pal = c('#FF7631','#FFB100','#E8E328','#8CC83F','#62CCA6','#59B9C9','#b3b3b3','#808080','gray','#d9d9d9')[r]
}
Load preprocessed dataset (preprocessing code in 19_10_14_data_preprocessing.Rmd) and clustering (pipeline in 19_10_21_WGCNA.Rmd)
# Gandal dataset
load('./../Data/preprocessed_data_imputed.RData')
datExpr = datExpr %>% data.frame
DE_info = DE_info %>% data.frame
# GO Neuronal annotations: regex 'neuron' in GO functional annotations and label the genes that make a match as neuronal
GO_annotations = read.csv('./../Data/genes_GO_annotations.csv')
GO_neuronal = GO_annotations %>% filter(grepl('neuron', go_term)) %>%
mutate('ID'=as.character(ensembl_gene_id)) %>%
dplyr::select(-ensembl_gene_id) %>% distinct(ID) %>%
mutate('Neuronal'=1)
# SFARI Genes
SFARI_genes = read_csv('./../../../SFARI/Data/SFARI_genes_08-29-2019_w_ensembl_IDs.csv')
SFARI_genes = SFARI_genes[!duplicated(SFARI_genes$ID) & !is.na(SFARI_genes$ID),]
# Clusterings
clusterings = read_csv('./../Data/clusters_imputed.csv')
# Update DE_info with SFARI and Neuronal information
genes_info = DE_info %>% mutate('ID'=rownames(.)) %>% left_join(SFARI_genes, by='ID') %>%
mutate(`gene-score`=ifelse(is.na(`gene-score`), 'None', `gene-score`)) %>%
left_join(GO_neuronal, by='ID') %>% left_join(clusterings, by='ID') %>%
mutate(Neuronal=ifelse(is.na(Neuronal), 0, Neuronal)) %>%
mutate(gene.score=ifelse(`gene-score`=='None' & Neuronal==1, 'Neuronal', `gene-score`),
significant=padj<0.05 & !is.na(padj))
# Add gene symbol
getinfo = c('ensembl_gene_id','external_gene_id')
mart = useMart(biomart='ENSEMBL_MART_ENSEMBL', dataset='hsapiens_gene_ensembl',
host='feb2014.archive.ensembl.org') ## Gencode v19
gene_names = getBM(attributes=getinfo, filters=c('ensembl_gene_id'), values=genes_info$ID, mart=mart)
genes_info = genes_info %>% left_join(gene_names, by=c('ID'='ensembl_gene_id'))
clustering_selected = 'DynamicHybrid'
genes_info$Module = genes_info[,clustering_selected]
dataset = read.csv(paste0('./../Data/dataset_', clustering_selected, '_imputed.csv'))
dataset$Module = dataset[,clustering_selected]
rm(DE_info, GO_annotations, clusterings, getinfo, mart, dds)
Using the hetcor function, that calculates Pearson, polyserial or polychoric correlations depending on the type of variables involved.
datTraits = datMeta %>% dplyr::select(Diagnosis, Region, Sex, Age, PMI, RNAExtractionBatch) %>%
dplyr::rename('ExtractionBatch' = RNAExtractionBatch)
# Recalculate MEs with color labels
ME_object = datExpr %>% t %>% moduleEigengenes(colors = genes_info$Module)
MEs = orderMEs(ME_object$eigengenes)
# Calculate correlation between eigengenes and the traits and their p-values
moduleTraitCor = MEs %>% apply(2, function(x) hetcor(x, datTraits)$correlations[1,-1]) %>% t
rownames(moduleTraitCor) = colnames(MEs)
colnames(moduleTraitCor) = colnames(datTraits)
moduleTraitPvalue = corPvalueStudent(moduleTraitCor, nrow(datExpr))
# Create text matrix for the Heatmap
textMatrix = paste0(signif(moduleTraitCor, 2), ' (', signif(moduleTraitPvalue, 1), ')')
dim(textMatrix) = dim(moduleTraitCor)
# In case there are any NAs
if(sum(!complete.cases(moduleTraitCor))>0){
print(paste0(sum(is.na(moduleTraitCor)),' correlation(s) could not be calculated'))
}
## [1] "1 correlation(s) could not be calculated"
rm(ME_object)
Note: The correlations between Modules and Diagonsis that cannot be calculated, weirdly enough, is because the initial correlation is too high, so it would be a very bad thing to lose these modules because of this numerical error. I’m going to fill in the values using the polyserial function, which doesn’t give exactly the same results as the hetcor() function, but it’s quite similar.
# Calculate the correlation tha failed with hetcor()
missing_modules = rownames(moduleTraitCor)[is.na(moduleTraitCor[,1])]
for(m in missing_modules){
cat(paste0('Correcting Module-Diagnosis correlation for Module ', gsub('ME','',m)))
moduleTraitCor[m,'Diagnosis'] = polyserial(MEs[,m], datTraits$Diagnosis)
}
## Correcting Module-Diagnosis correlation for Module #00C0B5
rm(missing_modules)
I’m going to select all the modules that have an absolute correlation higher than 0.9 with Diagnosis to study them
# Sort moduleTraitCor by Diagnosis
moduleTraitCor = moduleTraitCor[order(moduleTraitCor[,1], decreasing=TRUE),]
moduleTraitPvalue = moduleTraitPvalue[order(moduleTraitCor[,1], decreasing=TRUE),]
# Create text matrix for the Heatmap
textMatrix = paste0(signif(moduleTraitCor, 2), ' (', signif(moduleTraitPvalue, 1), ')')
dim(textMatrix) = dim(moduleTraitCor)
labeledHeatmap(Matrix = moduleTraitCor, xLabels = names(datTraits), yLabels = gsub('ME','',rownames(moduleTraitCor)),
yColorWidth=0, colors = brewer.pal(11,'PiYG'), bg.lab.y = gsub('ME','',rownames(moduleTraitCor)),
textMatrix = textMatrix, setStdMargins = FALSE, cex.text = 0.8, cex.lab.y = 0.75, zlim = c(-1,1),
main = paste('Module-Trait relationships'))
diagnosis_cor = data.frame('Module' = gsub('ME','',rownames(moduleTraitCor)),
'MTcor' = moduleTraitCor[,'Diagnosis'],
'MTpval' = moduleTraitPvalue[,'Diagnosis'])
genes_info = genes_info %>% left_join(diagnosis_cor, by='Module')
rm(moduleTraitPvalue, datTraits, textMatrix, diagnosis_cor)
The modules consist mainly of points with very high (absolute) values in PC2 (which we know is related to lfc), so this result is consistent with the high correlation between Module and Diagnosis, although some of the points with the highest PC2 values do not belong to these top modules
top_modules = gsub('ME','',rownames(moduleTraitCor)[abs(moduleTraitCor[,'Diagnosis'])>0.9])
cat(paste0('Top modules selected: ', paste(top_modules, collapse=', '),'\n'))
## Top modules selected: #00C0B5, #E46DF5
pca = datExpr %>% prcomp
plot_data = data.frame('ID'=rownames(datExpr), 'PC1' = pca$x[,1], 'PC2' = pca$x[,2]) %>%
left_join(dataset, by='ID') %>% left_join(genes_info %>% dplyr::select(ID, external_gene_id), by='ID') %>%
dplyr::select(ID, external_gene_id, PC1, PC2, Module, gene.score) %>%
mutate(ImportantModules = ifelse(Module %in% top_modules, as.character(Module), 'Others')) %>%
mutate(color = ifelse(ImportantModules=='Others','gray',ImportantModules),
alpha = ifelse(ImportantModules=='Others', 0.2, 0.4),
gene_id = paste0(ID, ' (', external_gene_id, ')'))
table(plot_data$ImportantModules)
##
## #00C0B5 #E46DF5 Others
## 1341 1011 13795
ggplotly(plot_data %>% ggplot(aes(PC1, PC2, color=ImportantModules)) +
geom_point(alpha=plot_data$alpha, color=plot_data$color, aes(ID=gene_id)) + theme_minimal() +
ggtitle('Modules with strongest relation to Diagnosis'))
rm(pca)
create_plot = function(module){
plot_data = dataset %>% dplyr::select(ID, paste0('MM.',gsub('#','',module)), GS, gene.score) %>% filter(dataset$Module==module)
colnames(plot_data)[2] = 'Module'
SFARI_colors = as.numeric(names(table(as.character(plot_data$gene.score)[plot_data$gene.score!='None'])))
p = ggplotly(plot_data %>% ggplot(aes(Module, GS, color=gene.score)) + geom_point(alpha=0.5, aes(ID=ID)) + ylab('Gene Significance') +
scale_color_manual(values=SFARI_colour_hue(r=c(SFARI_colors,8))) + theme_minimal() + xlab('Module Membership') +
ggtitle(paste0('Module ', module,' (MTcor = ', round(moduleTraitCor[paste0('ME',module),1],2),')')))
return(p)
}
create_plot(top_modules[1])
create_plot(top_modules[2])
rm(create_plot)
List of top SFARI Genes in top modules ordered by SFARI score and Gene Significance
table_data = dataset %>% left_join(genes_info %>% dplyr::select(ID, external_gene_id), by='ID') %>%
dplyr::select(ID, external_gene_id, GS, gene.score, Module) %>% arrange(gene.score, desc(abs(GS))) %>%
dplyr::rename('Ensembl ID'=ID, 'Gene Symbol'=external_gene_id,
'SFARI score'=gene.score, 'Gene Significance'=GS)
kable(table_data %>% filter(Module == top_modules[1] & `SFARI score` %in% c(1,2,3)) %>% dplyr::select(-Module),
caption=paste0('Top SFARI Genes for Module ', top_modules[1]))
| Ensembl ID | Gene Symbol | Gene Significance | SFARI score |
|---|---|---|---|
| ENSG00000110066 | SUV420H1 | 0.6677451 | 1 |
| ENSG00000141431 | ASXL3 | 0.5523087 | 1 |
| ENSG00000189056 | RELN | -0.0965191 | 1 |
| ENSG00000108510 | MED13 | 0.7613289 | 2 |
| ENSG00000038382 | TRIO | 0.6348694 | 2 |
| ENSG00000117139 | KDM5B | 0.6007663 | 2 |
| ENSG00000181722 | ZBTB20 | 0.8748406 | 3 |
| ENSG00000168769 | TET2 | 0.8167971 | 3 |
| ENSG00000181090 | EHMT1 | 0.8046105 | 3 |
| ENSG00000162946 | DISC1 | 0.6680125 | 3 |
| ENSG00000083168 | KAT6A | 0.6183379 | 3 |
| ENSG00000205581 | HMGN1 | 0.5859493 | 3 |
| ENSG00000166833 | NAV2 | 0.4475125 | 3 |
| ENSG00000149571 | KIRREL3 | 0.4276545 | 3 |
| ENSG00000197724 | PHF2 | 0.4062299 | 3 |
| ENSG00000103257 | SLC7A5 | 0.3830543 | 3 |
| ENSG00000145020 | AMT | 0.3789058 | 3 |
| ENSG00000196839 | ADA | 0.3711328 | 3 |
| ENSG00000101004 | NINL | 0.3483560 | 3 |
| ENSG00000134698 | AGO4 | 0.2741164 | 3 |
| ENSG00000128573 | FOXP2 | 0.2472601 | 3 |
| ENSG00000118432 | CNR1 | 0.2121820 | 3 |
| ENSG00000165995 | CACNB2 | 0.1659124 | 3 |
| ENSG00000112902 | SEMA5A | 0.1492722 | 3 |
kable(table_data %>% filter(Module == top_modules[2] & `SFARI score` %in% c(1,2,3)) %>% dplyr::select(-Module),
caption=paste0('Top SFARI Genes for Module ', top_modules[2]))
| Ensembl ID | Gene Symbol | Gene Significance | SFARI score |
|---|---|---|---|
| ENSG00000136535 | TBR1 | -0.6421845 | 1 |
| ENSG00000136531 | SCN2A | -0.5865435 | 1 |
| ENSG00000174469 | CNTNAP2 | -0.7276510 | 2 |
| ENSG00000155974 | GRIP1 | -0.6495467 | 2 |
| ENSG00000157445 | CACNA2D3 | -0.3055170 | 2 |
| ENSG00000144619 | CNTN4 | -0.2873307 | 2 |
| ENSG00000074590 | NUAK1 | -0.8334792 | 3 |
| ENSG00000144285 | SCN1A | -0.8124748 | 3 |
| ENSG00000196876 | SCN8A | -0.7956552 | 3 |
| ENSG00000170579 | DLGAP1 | -0.7856950 | 3 |
| ENSG00000132294 | EFR3A | -0.7769346 | 3 |
| ENSG00000005955 | GGNBP2 | -0.7735918 | 3 |
| ENSG00000078328 | RBFOX1 | -0.7616138 | 3 |
| ENSG00000197535 | MYO5A | -0.7372328 | 3 |
| ENSG00000157087 | ATP2B2 | -0.7189717 | 3 |
| ENSG00000182621 | PLCB1 | -0.7144580 | 3 |
| ENSG00000175497 | DPP10 | -0.7073016 | 3 |
| ENSG00000021645 | NRXN3 | -0.6827653 | 3 |
| ENSG00000003147 | ICA1 | -0.6806178 | 3 |
| ENSG00000170745 | KCNS3 | -0.5681164 | 3 |
| ENSG00000183454 | GRIN2A | -0.5680448 | 3 |
| ENSG00000185345 | PARK2 | -0.4978854 | 3 |
| ENSG00000050030 | KIAA2022 | -0.4407621 | 3 |
| ENSG00000185008 | ROBO2 | -0.3077378 | 3 |
| ENSG00000182256 | GABRG3 | -0.2590592 | 3 |
| ENSG00000149970 | CNKSR2 | 0.1124385 | 3 |
| ENSG00000160305 | DIP2A | 0.0578090 | 3 |
| ENSG00000171723 | GPHN | -0.0362838 | 3 |
| ENSG00000166147 | FBN1 | 0.0110186 | 3 |
Modules with the strongest module-diagnosis correlation should have the highest percentage of SFARI Genes, but this doesn’t seem to be the case (even the opposite may be true)
plot_data = dataset %>% mutate('hasSFARIscore' = gene.score!='None') %>%
group_by(Module, MTcor, hasSFARIscore) %>% summarise(p=n()) %>%
left_join(dataset %>% group_by(Module) %>% summarise(n=n()), by='Module') %>%
mutate(p=round(p/n*100,2))
for(i in 1:nrow(plot_data)){
this_row = plot_data[i,]
if(this_row$hasSFARIscore==FALSE & this_row$p==100){
new_row = this_row
new_row$hasSFARIscore = TRUE
new_row$p = 0
plot_data = plot_data %>% rbind(new_row)
}
}
plot_data = plot_data %>% filter(hasSFARIscore==TRUE)
ggplotly(plot_data %>% ggplot(aes(MTcor, p, size=n)) + geom_smooth(color='gray', se=FALSE) +
geom_point(color=plot_data$Module, alpha=0.5, aes(id=Module)) + geom_hline(yintercept=mean(plot_data$p), color='gray') +
xlab('Module-Diagnosis correlation') + ylab('% of SFARI genes') +
theme_minimal() + theme(legend.position = 'none'))
rm(i, this_row, new_row, plot_data)
Breaking the SFARI genes by score
Perhaps there aren’t enough genes in each score for these plots to be reliable
scores = c(1,2,3,4,5,6,'None')
plot_data = dataset %>% group_by(Module, MTcor, gene.score) %>% summarise(n=n()) %>%
left_join(dataset %>% group_by(Module) %>% summarise(N=n()), by='Module') %>%
mutate(p=round(n/N*100,2), gene.score = as.character(gene.score))
for(i in 1:nrow(plot_data)){
this_row = plot_data[i,]
if(sum(plot_data$Module == this_row$Module)<7){
missing_scores = which(! scores %in% plot_data$gene.score[plot_data$Module == this_row$Module])
for(s in missing_scores){
new_row = this_row
new_row$gene.score = as.character(s)
new_row$n = 0
new_row$p = 0
plot_data = plot_data %>% rbind(new_row)
}
}
}
plot_data = plot_data %>% filter(gene.score != 'None')
plot_function = function(i){
i = 2*i-1
plot_list = list()
for(j in 1:2){
plot_list[[j]] = ggplotly(plot_data %>% filter(gene.score==scores[i+j-1]) %>% ggplot(aes(MTcor, p, size=n)) +
geom_smooth(color='gray', se=FALSE) +
geom_point(color=plot_data$Module[plot_data$gene.score==scores[i+j-1]], alpha=0.5, aes(id=Module)) +
geom_hline(yintercept=mean(plot_data$p[plot_data$gene.score==scores[i+j-1]]), color='gray') +
xlab('Module-Diagnosis correlation') + ylab('% of SFARI genes') +
theme_minimal() + theme(legend.position = 'none'))
}
p = subplot(plot_list, nrows=1) %>% layout(annotations = list(
list(x = 0.2 , y = 1.05, text = paste0('SFARI score ', scores[i]), showarrow = F, xref='paper', yref='paper'),
list(x = 0.8 , y = 1.05, text = paste0('SFARI score ', scores[i+1]), showarrow = F, xref='paper', yref='paper')))
return(p)
}
plot_function(1)
plot_function(2)
plot_function(3)
rm(i, s, this_row, new_row, plot_function)
Since these modules have the strongest relation to autism, this pattern should be reflected in their model eigengenes, having two different behaviours for the samples corresponding to autism and the ones corresponding to control.
In both cases, the Eigengenes separate the behaviour between autism and control samples very clearly!
plot_EGs = function(module){
plot_data = data.frame('ID' = rownames(MEs), 'MEs' = MEs[,paste0('ME',module)], 'Diagnosis' = datMeta$Diagnosis)
p = plot_data %>% ggplot(aes(Diagnosis, MEs, fill=Diagnosis)) + geom_boxplot() + theme_minimal() + theme(legend.position='none') +
ggtitle(paste0('Module ', module, ' (MTcor=',round(moduleTraitCor[paste0('ME',module),1],2),')'))
return(p)
}
p1 = plot_EGs(top_modules[1])
p2 = plot_EGs(top_modules[2])
grid.arrange(p1, p2, nrow=1)
rm(plot_EGs, p1, p2)
Selecting the modules with the highest correlation to Diagnosis, and, from them, the genes with the highest module membership-(absolute) gene significance
*Ordered by \(\frac{MM+|GS|}{2}\)
There aren’t that many SFARI genes in the top genes of the modules and not a single one belonging to scores 1 and 2
create_table = function(module){
top_genes = dataset %>% left_join(genes_info %>% dplyr::select(ID, external_gene_id), by='ID') %>%
dplyr::select(ID, external_gene_id, paste0('MM.',gsub('#','',module)), GS, gene.score) %>%
filter(dataset$Module==module) %>% dplyr::rename('MM' = paste0('MM.',gsub('#','',module))) %>%
mutate(importance = (MM+abs(GS))/2) %>% arrange(by=-importance) %>% top_n(20)
return(top_genes)
}
top_genes = list()
for(i in 1:length(top_modules)) top_genes[[i]] = create_table(top_modules[i])
kable(top_genes[[1]], caption=paste0('Top 10 genes for module ', top_modules[1], ' (MTcor = ',
round(moduleTraitCor[paste0('ME',top_modules[1]),1],2),')'))
| ID | external_gene_id | MM | GS | gene.score | importance |
|---|---|---|---|---|---|
| ENSG00000143384 | MCL1 | 0.8552824 | 0.9032620 | None | 0.8792722 |
| ENSG00000003402 | CFLAR | 0.8911964 | 0.8305399 | None | 0.8608681 |
| ENSG00000181722 | ZBTB20 | 0.8304805 | 0.8748406 | 3 | 0.8526606 |
| ENSG00000089159 | PXN | 0.7949400 | 0.8685202 | None | 0.8317301 |
| ENSG00000158615 | PPP1R15B | 0.8294061 | 0.8322220 | None | 0.8308141 |
| ENSG00000184384 | MAML2 | 0.7956773 | 0.8559612 | None | 0.8258192 |
| ENSG00000196935 | SRGAP1 | 0.7227821 | 0.9258820 | None | 0.8243321 |
| ENSG00000161638 | ITGA5 | 0.7907543 | 0.8384614 | None | 0.8146079 |
| ENSG00000196576 | PLXNB2 | 0.7798962 | 0.8458053 | None | 0.8128508 |
| ENSG00000120278 | PLEKHG1 | 0.8021478 | 0.8151698 | None | 0.8086588 |
| ENSG00000138119 | MYOF | 0.7678768 | 0.8442134 | None | 0.8060451 |
| ENSG00000150457 | LATS2 | 0.7581871 | 0.8437052 | None | 0.8009462 |
| ENSG00000124782 | RREB1 | 0.7566820 | 0.8375656 | None | 0.7971238 |
| ENSG00000172493 | AFF1 | 0.7590609 | 0.8347661 | None | 0.7969135 |
| ENSG00000102531 | FNDC3A | 0.7941594 | 0.7992886 | None | 0.7967240 |
| ENSG00000253731 | PCDHGA6 | 0.7661406 | 0.8119576 | None | 0.7890491 |
| ENSG00000133812 | SBF2 | 0.7728464 | 0.8050416 | None | 0.7889440 |
| ENSG00000072364 | AFF4 | 0.7841051 | 0.7901701 | 6 | 0.7871376 |
| ENSG00000148841 | ITPRIP | 0.7670328 | 0.8067253 | None | 0.7868791 |
| ENSG00000168209 | DDIT4 | 0.7439992 | 0.8279055 | None | 0.7859523 |
kable(top_genes[[2]], caption=paste0('Top 10 genes for module ', top_modules[2], ' (MTcor = ',
round(moduleTraitCor[paste0('ME',top_modules[2]),1],2),')'))
| ID | external_gene_id | MM | GS | gene.score | importance |
|---|---|---|---|---|---|
| ENSG00000050748 | MAPK9 | 0.8990191 | -0.9501362 | None | 0.9245776 |
| ENSG00000177432 | NAP1L5 | 0.8829939 | -0.9182444 | None | 0.9006191 |
| ENSG00000138078 | PREPL | 0.8757652 | -0.9145463 | None | 0.8951558 |
| ENSG00000155097 | ATP6V1C1 | 0.8612938 | -0.9176117 | None | 0.8894527 |
| ENSG00000125814 | NAPB | 0.8940304 | -0.8843363 | None | 0.8891833 |
| ENSG00000163577 | EIF5A2 | 0.8546741 | -0.9233279 | None | 0.8890010 |
| ENSG00000128881 | TTBK2 | 0.9059408 | -0.8626445 | None | 0.8842926 |
| ENSG00000171132 | PRKCE | 0.8634904 | -0.9045455 | None | 0.8840180 |
| ENSG00000176490 | DIRAS1 | 0.8594598 | -0.9057699 | None | 0.8826149 |
| ENSG00000114573 | ATP6V1A | 0.8088574 | -0.9496685 | None | 0.8792630 |
| ENSG00000131437 | KIF3A | 0.8643867 | -0.8914761 | None | 0.8779314 |
| ENSG00000196876 | SCN8A | 0.9494013 | -0.7956552 | 3 | 0.8725282 |
| ENSG00000111674 | ENO2 | 0.8638465 | -0.8786881 | None | 0.8712673 |
| ENSG00000172348 | RCAN2 | 0.8131262 | -0.9275533 | None | 0.8703397 |
| ENSG00000130540 | SULT4A1 | 0.8737602 | -0.8601999 | None | 0.8669801 |
| ENSG00000184368 | MAP7D2 | 0.7777380 | -0.9482718 | None | 0.8630049 |
| ENSG00000065609 | SNAP91 | 0.8971523 | -0.8179428 | None | 0.8575475 |
| ENSG00000022355 | GABRA1 | 0.9111891 | -0.8010918 | 5 | 0.8561405 |
| ENSG00000144285 | SCN1A | 0.8919455 | -0.8124748 | 3 | 0.8522102 |
| ENSG00000198825 | INPP5F | 0.8379959 | -0.8652389 | None | 0.8516174 |
rm(create_table)
pca = datExpr %>% prcomp
ids = c()
for(tg in top_genes) ids = c(ids, tg$ID)
plot_data = data.frame('ID'=rownames(datExpr), 'PC1' = pca$x[,1], 'PC2' = pca$x[,2]) %>%
left_join(dataset, by='ID') %>% dplyr::select(ID, PC1, PC2, Module, gene.score) %>%
mutate(color = ifelse(Module %in% top_modules, as.character(Module), 'gray')) %>%
mutate(alpha = ifelse(color %in% top_modules &
ID %in% ids, 1, 0.1))
plot_data %>% ggplot(aes(PC1, PC2)) + geom_point(alpha=plot_data$alpha, color=plot_data$color) +
theme_minimal() + ggtitle('Important genes identified through WGCNA')
Level of expression by Diagnosis for top genes, ordered by importance (defined above)
create_plot = function(i){
plot_data = datExpr[rownames(datExpr) %in% top_genes[[i]]$ID,] %>% mutate('ID' = rownames(.)) %>%
melt(id.vars='ID') %>% mutate(variable = gsub('X','',variable)) %>%
left_join(top_genes[[i]], by='ID') %>%
left_join(datMeta %>% dplyr::select(Dissected_Sample_ID, Diagnosis),
by = c('variable'='Dissected_Sample_ID')) %>% arrange(desc(importance))
p = ggplotly(plot_data %>% mutate(external_gene_id=factor(external_gene_id,
levels=unique(plot_data$external_gene_id), ordered=T)) %>%
ggplot(aes(external_gene_id, value, fill=Diagnosis)) + geom_boxplot() + theme_minimal() +
xlab(paste0('Top genes for module ', top_modules[i], ' (MTcor = ',
round(genes_info$MTcor[genes_info$Module==top_modules[i]][1],2), ')')) + ylab('Level of Expression') +
theme(axis.text.x = element_text(angle = 90, hjust = 1)))
return(p)
}
create_plot(1)
create_plot(2)
rm(create_plot)
Using the package anRichment
It was designed by Peter Langfelder explicitly to perform enrichmen analysis on WGCNA’s modules in brain-related experiments (mainly Huntington’s Disease)
It has packages with brain annotations:
BrainDiseaseCollection: A Brain Disease Gene Set Collection for anRichment
MillerAIBSCollection: (included in anRichment) Contains gene sets collected by Jeremy A. Miller at AIBS of various cell type and brain region marker sets, gene sets collected from expression studies of developing brain, as well as a collection of transcription factor (TF) targets from the original ChEA study
The tutorial says it’s an experimental package
It’s not on CRAN nor in Bioconductor
# Prepare dataset
# Create dataset with top modules membership and removing the genes without an assigned module
EA_dataset = data.frame('ensembl_gene_id' = genes_info$ID,
module = ifelse(genes_info$Module %in% top_modules, genes_info$Module, 'other')) %>%
filter(genes_info$Module!='gray')
# Assign Entrez Gene Id to each gene
getinfo = c('ensembl_gene_id','entrezgene')
mart = useMart(biomart='ENSEMBL_MART_ENSEMBL', dataset='hsapiens_gene_ensembl', host='feb2014.archive.ensembl.org')
biomart_output = getBM(attributes=getinfo, filters=c('ensembl_gene_id'), values=EA_dataset$ensembl_gene_id, mart=mart)
## Batch submitting query [=>-----------------------------] 6% eta: 7s Batch
## submitting query [==>----------------------------] 9% eta: 7s Batch submitting
## query [===>---------------------------] 12% eta: 7s Batch submitting
## query [====>--------------------------] 15% eta: 6s Batch submitting
## query [=====>-------------------------] 18% eta: 6s Batch submitting
## query [======>------------------------] 21% eta: 6s Batch submitting
## query [=======>-----------------------] 24% eta: 6s Batch submitting
## query [=======>-----------------------] 27% eta: 5s Batch submitting
## query [========>----------------------] 30% eta: 5s Batch submitting
## query [=========>---------------------] 33% eta: 5s Batch submitting
## query [==========>--------------------] 36% eta: 5s Batch submitting
## query [===========>-------------------] 39% eta: 5s Batch submitting
## query [============>------------------] 42% eta: 5s Batch submitting
## query [=============>-----------------] 45% eta: 4s Batch submitting
## query [==============>----------------] 48% eta: 4s Batch submitting
## query [===============>---------------] 52% eta: 4s Batch submitting
## query [================>--------------] 55% eta: 4s Batch submitting
## query [=================>-------------] 58% eta: 3s Batch submitting
## query [==================>------------] 61% eta: 3s Batch submitting
## query [===================>-----------] 64% eta: 3s Batch submitting
## query [====================>----------] 67% eta: 3s Batch submitting
## query [=====================>---------] 70% eta: 2s Batch submitting
## query [======================>--------] 73% eta: 2s Batch submitting
## query [======================>--------] 76% eta: 2s Batch submitting
## query [=======================>-------] 79% eta: 2s Batch submitting
## query [========================>------] 82% eta: 1s Batch submitting
## query [=========================>-----] 85% eta: 1s Batch submitting
## query [==========================>----] 88% eta: 1s Batch submitting
## query [===========================>---] 91% eta: 1s Batch submitting
## query [============================>--] 94% eta: 0s Batch submitting
## query [=============================>-] 97% eta: 0s Batch submitting query
## [===============================] 100% eta: 0s
EA_dataset = EA_dataset %>% left_join(biomart_output, by='ensembl_gene_id')
for(tm in top_modules){
cat(paste0('\n',sum(EA_dataset$module==tm & is.na(EA_dataset$entrezgene)), ' genes from top module ',
tm, ' don\'t have an Entrez Gene ID'))
}
##
## 34 genes from top module #00C0B5 don't have an Entrez Gene ID
## 14 genes from top module #E46DF5 don't have an Entrez Gene ID
rm(getinfo, mart, biomart_output, tm)
# Manual: https://horvath.genetics.ucla.edu/html/CoexpressionNetwork/GeneAnnotation/Tutorials/anRichment-Tutorial1.pdf
collectGarbage()
# EA_dataset = rbind(EA_dataset[EA_dataset$module!='other',], EA_dataset[EA_dataset$module=='other',][sample(sum(EA_dataset$module=='other'), 1000),])
# Prepare datasets
GO_col = buildGOcollection(organism = 'human', verbose = 0)
## Loading required package: org.Hs.eg.db
##
## 'select()' returned 1:many mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
internal_col = internalCollection(organism = 'human')
MillerAIBS_col = MillerAIBSCollection(organism = 'human')
BrainDisease_col = BrainDiseaseCollection(organism = 'human')
combined_col = mergeCollections(GO_col, internal_col, MillerAIBS_col, BrainDisease_col)
# Print collections used
cat('Using collections: ')
## Using collections:
knownGroups(combined_col, sortBy = 'size')
## [1] "GO"
## [2] "GO.BP"
## [3] "GO.MF"
## [4] "GO.CC"
## [5] "JA Miller at AIBS"
## [6] "Chip-X enrichment analysis (ChEA)"
## [7] "Brain"
## [8] "JAM"
## [9] "Prenatal brain"
## [10] "Brain region markers"
## [11] "Cortex"
## [12] "Brain region marker enriched gene sets"
## [13] "WGCNA"
## [14] "BrainRegionMarkers"
## [15] "BrainRegionMarkers.HBA"
## [16] "BrainRegionMarkers.HBA.localMarker(top200)"
## [17] "Postnatal brain"
## [18] "ImmunePathways"
## [19] "Markers of cortex layers"
## [20] "BrainLists"
## [21] "Cell type markers"
## [22] "Germinal brain"
## [23] "BrainRegionMarkers.HBA.globalMarker(top200)"
## [24] "Accelerated evolution"
## [25] "Postmitotic brain"
## [26] "BrainLists.Blalock_AD"
## [27] "BrainLists.DiseaseGenes"
## [28] "BloodAtlases"
## [29] "Verge Disease Genes"
## [30] "BloodAtlases.Whitney"
## [31] "BrainLists.JAXdiseaseGene"
## [32] "BrainLists.MO"
## [33] "Age-associated genes"
## [34] "BrainLists.Lu_Aging"
## [35] "Cell type marker enriched gene sets"
## [36] "BrainLists.CA1vsCA3"
## [37] "BrainLists.MitochondrialType"
## [38] "BrainLists.MO.2+_26Mar08"
## [39] "BrainLists.MO.Sugino"
## [40] "BloodAtlases.Gnatenko2"
## [41] "BloodAtlases.Kabanova"
## [42] "BrainLists.Voineagu"
## [43] "StemCellLists"
## [44] "StemCellLists.Lee"
# Perform Enrichment Analysis
enrichment = enrichmentAnalysis(classLabels = EA_dataset$module, identifiers = EA_dataset$entrezgene,
refCollection = combined_col, #useBackground = 'given',
threshold = 1e-4, thresholdType = 'Bonferroni',
getOverlapEntrez = FALSE, getOverlapSymbols = TRUE)
## enrichmentAnalysis: preparing data..
## ..working on label set 1 ..
kable(enrichment$enrichmentTable %>% filter(class==top_modules[1]) %>%
dplyr::select(dataSetID, shortDataSetName, inGroups, Bonferroni, FDR, enrichmentRatio,
effectiveClassSize, effectiveSetSize, nCommonGenes) %>%
arrange(Bonferroni, desc(enrichmentRatio)),
caption = paste0('Enriched terms for module ', top_modules[1], ' (MTcor = ',
round(genes_info$MTcor[genes_info$Module==top_modules[1]][1],4), ')'))
| dataSetID | shortDataSetName | inGroups | Bonferroni | FDR | enrichmentRatio | effectiveClassSize | effectiveSetSize | nCommonGenes |
|---|---|---|---|---|---|---|---|---|
| GO:0006351 | transcription, DNA-templated | GO|GO.BP | 4.50e-06 | 0e+00 | 1.370702 | 1299 | 2952 | 334 |
| GO:0006355 | regulation of transcription, DNA-templated | GO|GO.BP | 6.60e-06 | 1e-07 | 1.381922 | 1299 | 2779 | 317 |
| GO:0097659 | nucleic acid-templated transcription | GO|GO.BP | 8.00e-06 | 1e-07 | 1.361793 | 1299 | 2998 | 337 |
| GO:1903506 | regulation of nucleic acid-templated transcription | GO|GO.BP | 9.10e-06 | 1e-07 | 1.373568 | 1299 | 2840 | 322 |
| GO:2001141 | regulation of RNA biosynthetic process | GO|GO.BP | 1.22e-05 | 1e-07 | 1.370191 | 1299 | 2847 | 322 |
| GO:0032774 | RNA biosynthetic process | GO|GO.BP | 1.37e-05 | 1e-07 | 1.355913 | 1299 | 3011 | 337 |
| GO:2000112 | regulation of cellular macromolecule biosynthetic process | GO|GO.BP | 4.12e-05 | 3e-07 | 1.335813 | 1299 | 3147 | 347 |
| GO:0051252 | regulation of RNA metabolic process | GO|GO.BP | 6.45e-05 | 5e-07 | 1.339434 | 1299 | 3039 | 336 |
| GO:0010556 | regulation of macromolecule biosynthetic process | GO|GO.BP | 6.78e-05 | 5e-07 | 1.323019 | 1299 | 3269 | 357 |
| GO:0098609 | cell-cell adhesion | GO|GO.BP | 7.90e-05 | 6e-07 | 1.837215 | 1299 | 666 | 101 |
kable(enrichment$enrichmentTable %>% filter(class==top_modules[2]) %>%
dplyr::select(dataSetID, shortDataSetName, inGroups, Bonferroni, FDR, enrichmentRatio,
effectiveClassSize, effectiveSetSize, nCommonGenes) %>%
arrange(Bonferroni, enrichmentRatio),
caption = paste0('Enriched terms for module ', top_modules[2], ' (MTcor = ',
round(genes_info$MTcor[genes_info$Module==top_modules[2]][1],4), ')'))
| dataSetID | shortDataSetName | inGroups | Bonferroni | FDR | enrichmentRatio | effectiveClassSize | effectiveSetSize | nCommonGenes |
|---|---|---|---|---|---|---|---|---|
| JAMiller.AIBS.000142 | Highest in CP of 13-16 post-conception weeks human | JA Miller at AIBS|Brain|Prenatal brain|Markers of cortex layers|Cortex|Postmitotic brain | 0.00e+00 | 0e+00 | 2.897111 | 986 | 1212 | 220 |
| JAMiller.AIBS.000052 | CortexWGCNA 15-21 post-conception weeks C26 | JA Miller at AIBS|Brain|Prenatal brain|Cortex|WGCNA | 0.00e+00 | 0e+00 | 3.236118 | 986 | 725 | 147 |
| JAMiller.AIBS.000569 | WGCNA humanSpecificOlivedrab2Module frontalCtx FOXP2 | JA Miller at AIBS|Brain|Postnatal brain|Cortex|WGCNA | 0.00e+00 | 0e+00 | 1.707653 | 986 | 4047 | 433 |
| JAMiller.AIBS.000150 | Highest in CP of E14.5 mouse | JA Miller at AIBS|Brain|Prenatal brain|Markers of cortex layers|Cortex|Postmitotic brain | 0.00e+00 | 0e+00 | 2.526654 | 986 | 1276 | 202 |
| JAMiller.AIBS.000155 | Lowest in VZ of E14.5 mouse | JA Miller at AIBS|Brain|Prenatal brain|Markers of cortex layers|Cortex | 0.00e+00 | 0e+00 | 2.214607 | 986 | 1672 | 232 |
| JAM:002744 | Autism_differential_expression_across_at_least_one_comparison | JAM|BrainLists|BrainLists.Voineagu | 0.00e+00 | 0e+00 | 2.882908 | 986 | 764 | 138 |
| JAMiller.AIBS.000506 | Genes bound by SUZ12 in MOUSE MESC from PMID 20075857 | JA Miller at AIBS|Chip-X enrichment analysis (ChEA) | 0.00e+00 | 0e+00 | 1.715973 | 986 | 3367 | 362 |
| JAM:003016 | downAD_synapticTransmission | JAM|BrainLists|BrainLists.Blalock_AD | 0.00e+00 | 0e+00 | 7.711227 | 986 | 89 | 43 |
| JAMiller.AIBS.000570 | WGCNA Olivedrab2ModuleGenes with enriched ELAVL2 targets | JA Miller at AIBS|Brain|Postnatal brain|Cortex|WGCNA | 0.00e+00 | 0e+00 | 3.311296 | 986 | 482 | 100 |
| GO:0045202 | synapse | GO|GO.CC | 0.00e+00 | 0e+00 | 2.415470 | 986 | 1044 | 158 |
| GO:0097458 | neuron part | GO|GO.CC | 0.00e+00 | 0e+00 | 2.161076 | 986 | 1418 | 192 |
| GO:0044456 | synapse part | GO|GO.CC | 0.00e+00 | 0e+00 | 2.585553 | 986 | 821 | 133 |
| JAMiller.AIBS.000141 | CP enriched in E14.5 mouse | JA Miller at AIBS|Brain|Prenatal brain|Markers of cortex layers|Cortex|Postmitotic brain | 0.00e+00 | 0e+00 | 2.912081 | 986 | 570 | 104 |
| JAMiller.AIBS.000504 | Genes bound by SUZ12 in mouse MESC from PMID 18692474 | JA Miller at AIBS|Chip-X enrichment analysis (ChEA) | 0.00e+00 | 0e+00 | 2.044713 | 986 | 1366 | 175 |
| JAMiller.AIBS.000123 | HippocampusWGCNA turquoise DGenriched upAge | JA Miller at AIBS|Brain|Postnatal brain|WGCNA | 0.00e+00 | 0e+00 | 2.174448 | 986 | 1101 | 150 |
| JAMiller.AIBS.000505 | Genes bound by SUZ12 in MOUSE MESC from PMID 18974828 | JA Miller at AIBS|Chip-X enrichment analysis (ChEA) | 0.00e+00 | 0e+00 | 2.010855 | 986 | 1389 | 175 |
| GO:0007268 | chemical synaptic transmission | GO|GO.BP | 0.00e+00 | 0e+00 | 2.766866 | 986 | 548 | 95 |
| GO:0098916 | anterograde trans-synaptic signaling | GO|GO.BP | 0.00e+00 | 0e+00 | 2.766866 | 986 | 548 | 95 |
| GO:0099536 | synaptic signaling | GO|GO.BP | 0.00e+00 | 0e+00 | 2.745883 | 986 | 558 | 96 |
| GO:0099537 | trans-synaptic signaling | GO|GO.BP | 0.00e+00 | 0e+00 | 2.741849 | 986 | 553 | 95 |
| JAMiller.AIBS.000503 | Genes bound by SUZ12 in mouse MESC from PMID 18555785 | JA Miller at AIBS|Chip-X enrichment analysis (ChEA) | 0.00e+00 | 0e+00 | 2.399283 | 986 | 765 | 115 |
| GO:1990351 | transporter complex | GO|GO.CC | 0.00e+00 | 0e+00 | 3.732685 | 986 | 248 | 58 |
| GO:0043005 | neuron projection | GO|GO.CC | 0.00e+00 | 0e+00 | 2.156817 | 986 | 1036 | 140 |
| GO:1902495 | transmembrane transporter complex | GO|GO.CC | 0.00e+00 | 0e+00 | 3.743808 | 986 | 243 | 57 |
| GO:0034702 | ion channel complex | GO|GO.CC | 0.00e+00 | 0e+00 | 3.776356 | 986 | 224 | 53 |
| JAMiller.AIBS.000364 | Genes bound by MTF2 in MOUSE MESC from PMID 20144788 | JA Miller at AIBS|Chip-X enrichment analysis (ChEA) | 0.00e+00 | 0e+00 | 1.675637 | 986 | 2286 | 240 |
| JAMiller.AIBS.000463 | Genes bound by SMAD4 in HUMAN A2780 from PMID 21799915 | JA Miller at AIBS|Chip-X enrichment analysis (ChEA) | 0.00e+00 | 0e+00 | 1.696132 | 986 | 2089 | 222 |
| GO:0097060 | synaptic membrane | GO|GO.CC | 0.00e+00 | 0e+00 | 2.901899 | 986 | 374 | 68 |
| GO:0030424 | axon | GO|GO.CC | 0.00e+00 | 0e+00 | 2.586476 | 986 | 506 | 82 |
| JAM:002764 | downAging_mitochondria_synapse | JAM|BrainLists|BrainLists.Lu_Aging | 0.00e+00 | 0e+00 | 2.823771 | 986 | 390 | 69 |
| JAM:002805 | Cerebral Cortex | JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.globalMarker(top200)|Brain region markers|Brain region marker enriched gene sets | 0.00e+00 | 0e+00 | 4.039372 | 986 | 162 | 41 |
| JAM:003072 | Tail of Caudate Nucleus_IN_Striatum | JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.localMarker(top200)|Brain region markers|Brain region marker enriched gene sets | 0.00e+00 | 0e+00 | 4.039372 | 986 | 162 | 41 |
| GO:0098794 | postsynapse | GO|GO.CC | 0.00e+00 | 0e+00 | 2.464481 | 986 | 544 | 84 |
| GO:0098793 | presynapse | GO|GO.CC | 0.00e+00 | 0e+00 | 2.685652 | 986 | 416 | 70 |
| GO:0034220 | ion transmembrane transport | GO|GO.BP | 0.00e+00 | 0e+00 | 2.066308 | 986 | 896 | 116 |
| GO:0034703 | cation channel complex | GO|GO.CC | 0.00e+00 | 0e+00 | 3.872061 | 986 | 169 | 41 |
| GO:0071944 | cell periphery | GO|GO.CC | 0.00e+00 | 0e+00 | 1.412039 | 986 | 3990 | 353 |
| JAMiller.AIBS.000584 | noncodingHumanAcceleratedRegions fromSeveralSources | JA Miller at AIBS|Accelerated evolution | 0.00e+00 | 0e+00 | 2.070643 | 986 | 871 | 113 |
| JAMiller.AIBS.000436 | Genes bound by REST in MOUSE MESC from PMID 21632747 | JA Miller at AIBS|Chip-X enrichment analysis (ChEA) | 0.00e+00 | 0e+00 | 1.715152 | 986 | 1675 | 180 |
| JAM:002991 | downAD_synapticTransmission | JAM|BrainLists|BrainLists.Blalock_AD | 0.00e+00 | 0e+00 | 4.936221 | 986 | 97 | 30 |
| JAM:002751 | Basal Pons | JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.globalMarker(top200)|Brain region markers|Brain region marker enriched gene sets | 0.00e+00 | 0e+00 | 3.822861 | 986 | 167 | 40 |
| JAMiller.AIBS.000149 | Lowest in VZ of 13-16 post-conception weeks human | JA Miller at AIBS|Brain|Prenatal brain|Markers of cortex layers|Cortex | 0.00e+00 | 0e+00 | 2.280064 | 986 | 616 | 88 |
| GO:0006811 | ion transport | GO|GO.BP | 0.00e+00 | 0e+00 | 1.812581 | 986 | 1312 | 149 |
| GO:0007610 | behavior | GO|GO.BP | 0.00e+00 | 0e+00 | 2.414449 | 986 | 509 | 77 |
| GO:0042995 | cell projection | GO|GO.CC | 0.00e+00 | 0e+00 | 1.665905 | 986 | 1782 | 186 |
| GO:0045211 | postsynaptic membrane | GO|GO.CC | 0.00e+00 | 0e+00 | 2.989059 | 986 | 283 | 53 |
| GO:0006812 | cation transport | GO|GO.BP | 0.00e+00 | 0e+00 | 2.008506 | 986 | 890 | 112 |
| GO:0120025 | plasma membrane bounded cell projection | GO|GO.CC | 0.00e+00 | 0e+00 | 1.676634 | 986 | 1723 | 181 |
| GO:0007267 | cell-cell signaling | GO|GO.BP | 0.00e+00 | 0e+00 | 1.792092 | 986 | 1327 | 149 |
| JAM:002882 | Hippocampal Formation | JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.globalMarker(top200)|Brain region markers|Brain region marker enriched gene sets | 0.00e+00 | 0e+00 | 3.683180 | 986 | 169 | 39 |
| GO:0030001 | metal ion transport | GO|GO.BP | 1.00e-07 | 0e+00 | 2.163734 | 986 | 686 | 93 |
| GO:0043269 | regulation of ion transport | GO|GO.BP | 1.00e-07 | 0e+00 | 2.308565 | 986 | 560 | 81 |
| JAMiller.AIBS.000490 | Genes bound by STAT3 in HUMAN U87 from PMID 23295773 | JA Miller at AIBS|Chip-X enrichment analysis (ChEA) | 1.00e-07 | 0e+00 | 1.523326 | 986 | 2546 | 243 |
| GO:0005886 | plasma membrane | GO|GO.CC | 1.00e-07 | 0e+00 | 1.390880 | 986 | 3913 | 341 |
| JAM:002824 | Dentate Nucleus_IN_Cerebellar Nucleus | JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.localMarker(top200)|Brain region markers|Brain region marker enriched gene sets | 1.00e-07 | 0e+00 | 3.675739 | 986 | 165 | 38 |
| JAM:002769 | downAD_mitochondrion | JAM|BrainLists|BrainLists.Blalock_AD | 1.00e-07 | 0e+00 | 3.011405 | 986 | 265 | 50 |
| GO:0046873 | metal ion transmembrane transporter activity | GO|GO.MF | 1.00e-07 | 0e+00 | 2.896930 | 986 | 292 | 53 |
| JAMiller.AIBS.000106 | Genes enriched in the hippocampal SGZ in mouse | JA Miller at AIBS|Brain|Postnatal brain|Markers of cortex layers | 1.00e-07 | 0e+00 | 2.738775 | 986 | 338 | 58 |
| JAMiller.AIBS.000095 | Cortical PNOC neurons | JA Miller at AIBS|Brain|Postnatal brain|Cell type markers|Cortex | 1.00e-07 | 0e+00 | 1.383643 | 986 | 3945 | 342 |
| GO:0005216 | ion channel activity | GO|GO.MF | 3.00e-07 | 0e+00 | 2.907081 | 986 | 280 | 51 |
| GO:0015318 | inorganic molecular entity transmembrane transporter activity | GO|GO.MF | 3.00e-07 | 0e+00 | 2.319300 | 986 | 523 | 76 |
| GO:0050877 | nervous system process | GO|GO.BP | 3.00e-07 | 0e+00 | 2.002276 | 986 | 829 | 104 |
| JAMiller.AIBS.000518 | Genes bound by TCF4 in HUMAN U87 from PMID 23295773 | JA Miller at AIBS|Chip-X enrichment analysis (ChEA) | 3.00e-07 | 0e+00 | 1.452871 | 986 | 3021 | 275 |
| JAM:003054 | subiculum_IN_Hippocampal Formation | JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.localMarker(top200)|Brain region markers|Brain region marker enriched gene sets | 5.00e-07 | 0e+00 | 3.579009 | 986 | 165 | 37 |
| GO:0022838 | substrate-specific channel activity | GO|GO.MF | 6.00e-07 | 0e+00 | 2.846094 | 986 | 286 | 51 |
| JAM:003085 | trochlear nucleus_IN_Mesencephalon | JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.localMarker(top200)|Brain region markers|Brain region marker enriched gene sets | 7.00e-07 | 0e+00 | 3.699441 | 986 | 151 | 35 |
| GO:0042391 | regulation of membrane potential | GO|GO.BP | 7.00e-07 | 0e+00 | 2.684039 | 986 | 333 | 56 |
| GO:0098978 | glutamatergic synapse | GO|GO.CC | 8.00e-07 | 0e+00 | 2.644609 | 986 | 344 | 57 |
| JAM:002907 | inferior olivary complex_IN_Myelencephalon | JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.localMarker(top200)|Brain region markers|Brain region marker enriched gene sets | 1.00e-06 | 0e+00 | 3.494299 | 986 | 169 | 37 |
| GO:0007399 | nervous system development | GO|GO.BP | 1.00e-06 | 0e+00 | 1.573789 | 986 | 2008 | 198 |
| GO:0022890 | inorganic cation transmembrane transporter activity | GO|GO.MF | 1.30e-06 | 0e+00 | 2.540124 | 986 | 377 | 60 |
| GO:0008324 | cation transmembrane transporter activity | GO|GO.MF | 1.70e-06 | 0e+00 | 2.458455 | 986 | 409 | 63 |
| JAMiller.AIBS.000005 | CPi markers at 21 post-conception weeks | JA Miller at AIBS|Brain|Prenatal brain|Cortex|Markers of cortex layers|Postmitotic brain | 1.90e-06 | 0e+00 | 2.702568 | 986 | 313 | 53 |
| GO:0050804 | modulation of chemical synaptic transmission | GO|GO.BP | 2.00e-06 | 0e+00 | 2.538184 | 986 | 371 | 59 |
| GO:0099177 | regulation of trans-synaptic signaling | GO|GO.BP | 2.20e-06 | 0e+00 | 2.531361 | 986 | 372 | 59 |
| GO:0034765 | regulation of ion transmembrane transport | GO|GO.BP | 2.30e-06 | 0e+00 | 2.461561 | 986 | 402 | 62 |
| JAM:002920 | Lateral Nucleus_IN_Amygdala | JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.localMarker(top200)|Brain region markers|Brain region marker enriched gene sets | 3.10e-06 | 0e+00 | 3.440575 | 986 | 167 | 36 |
| JAM:003058 | Substantia Nigra, pars compacta_IN_Mesencephalon | JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.localMarker(top200)|Brain region markers|Brain region marker enriched gene sets | 3.10e-06 | 0e+00 | 3.440575 | 986 | 167 | 36 |
| GO:0055085 | transmembrane transport | GO|GO.BP | 3.10e-06 | 0e+00 | 1.743685 | 986 | 1254 | 137 |
| GO:0023061 | signal release | GO|GO.BP | 3.50e-06 | 0e+00 | 2.504432 | 986 | 376 | 59 |
| JAMiller.AIBS.000218 | Genes bound by AR in HUMAN PROSTATE CANCER from PMID 22383394 | JA Miller at AIBS|Chip-X enrichment analysis (ChEA) | 3.90e-06 | 0e+00 | 1.669277 | 986 | 1482 | 155 |
| GO:0022803 | passive transmembrane transporter activity | GO|GO.MF | 4.20e-06 | 0e+00 | 2.677236 | 986 | 310 | 52 |
| GO:0003008 | system process | GO|GO.BP | 4.30e-06 | 0e+00 | 1.680633 | 986 | 1434 | 151 |
| GO:0005261 | cation channel activity | GO|GO.MF | 5.50e-06 | 1e-07 | 3.033207 | 986 | 221 | 42 |
| JAMiller.AIBS.000140 | SZ/IZ/CP enriched in E14.5 mouse | JA Miller at AIBS|Brain|Prenatal brain|Markers of cortex layers|Cortex | 7.10e-06 | 1e-07 | 3.956008 | 986 | 117 | 29 |
| GO:0051049 | regulation of transport | GO|GO.BP | 7.20e-06 | 1e-07 | 1.642531 | 986 | 1545 | 159 |
| GO:0015075 | ion transmembrane transporter activity | GO|GO.MF | 7.50e-06 | 1e-07 | 2.156060 | 986 | 570 | 77 |
| JAMiller.AIBS.000071 | GerminalZonesWGCNA 15-21 post-conception weeks G3 IntermediateProgenitors | JA Miller at AIBS|Brain|Prenatal brain|Cortex|WGCNA|Germinal brain | 1.02e-05 | 1e-07 | 2.586861 | 986 | 327 | 53 |
| GO:0036477 | somatodendritic compartment | GO|GO.CC | 1.03e-05 | 1e-07 | 2.020891 | 986 | 695 | 88 |
| GO:0015267 | channel activity | GO|GO.MF | 1.17e-05 | 1e-07 | 2.634248 | 986 | 309 | 51 |
| GO:0015077 | monovalent inorganic cation transmembrane transporter activity | GO|GO.MF | 1.51e-05 | 1e-07 | 2.988029 | 986 | 219 | 41 |
| JAM:002739 | arcuate nucleus of medulla_IN_Myelencephalon | JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.localMarker(top200)|Brain region markers|Brain region marker enriched gene sets | 1.56e-05 | 1e-07 | 3.325093 | 986 | 168 | 35 |
| JAM:002875 | downAD_ionAndCalciumTransport | JAM|BrainLists|BrainLists.Blalock_AD | 1.59e-05 | 1e-07 | 4.506479 | 986 | 85 | 24 |
| JAM:003105 | ventral tegmental area_IN_Mesencephalon | JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.localMarker(top200)|Brain region markers|Brain region marker enriched gene sets | 1.97e-05 | 2e-07 | 3.522305 | 986 | 145 | 32 |
| JAM:002918 | lateral medullary reticular group_IN_Myelencephalon | JAM|BrainRegionMarkers|BrainRegionMarkers.HBA|BrainRegionMarkers.HBA.localMarker(top200)|Brain region markers|Brain region marker enriched gene sets | 2.30e-05 | 2e-07 | 3.349723 | 986 | 162 | 34 |
| JAMiller.AIBS.000471 | Genes bound by SOX2 in HUMAN LN229 GBM from PMID 21211035 | JA Miller at AIBS|Chip-X enrichment analysis (ChEA) | 2.80e-05 | 2e-07 | 1.452078 | 986 | 2572 | 234 |
| GO:0023052 | signaling | GO|GO.BP | 2.98e-05 | 2e-07 | 1.286249 | 986 | 4951 | 399 |
| JAMiller.AIBS.000217 | Genes bound by AR in HUMAN PC3 from PMID 19668381 | JA Miller at AIBS|Chip-X enrichment analysis (ChEA) | 2.99e-05 | 2e-07 | 1.439057 | 986 | 2684 | 242 |
| GO:0034762 | regulation of transmembrane transport | GO|GO.BP | 3.05e-05 | 2e-07 | 2.246033 | 986 | 469 | 66 |
| GO:0098655 | cation transmembrane transport | GO|GO.BP | 3.23e-05 | 3e-07 | 2.045564 | 986 | 632 | 81 |
| GO:0022839 | ion gated channel activity | GO|GO.MF | 3.55e-05 | 3e-07 | 2.864695 | 986 | 234 | 42 |
| GO:0098660 | inorganic ion transmembrane transport | GO|GO.BP | 3.80e-05 | 3e-07 | 2.049495 | 986 | 623 | 80 |
| GO:0022836 | gated channel activity | GO|GO.MF | 4.07e-05 | 3e-07 | 2.852505 | 986 | 235 | 42 |
| GO:0098590 | plasma membrane region | GO|GO.CC | 4.08e-05 | 3e-07 | 1.785565 | 986 | 1019 | 114 |
| GO:0033267 | axon part | GO|GO.CC | 4.37e-05 | 3e-07 | 2.487952 | 986 | 340 | 53 |
| JAM:002763 | downAD_metalIonTransport_glycoprotein | JAM|BrainLists|BrainLists.Blalock_AD | 4.40e-05 | 3e-07 | 2.660074 | 986 | 282 | 47 |
| JAM:002997 | Polycomb_genes_(PGSTs)_repressed_by_SCs | JAM|StemCellLists|StemCellLists.Lee | 4.91e-05 | 4e-07 | 1.749090 | 986 | 1095 | 120 |
| GO:0044463 | cell projection part | GO|GO.CC | 5.43e-05 | 4e-07 | 1.700703 | 986 | 1220 | 130 |
| GO:0120038 | plasma membrane bounded cell projection part | GO|GO.CC | 5.43e-05 | 4e-07 | 1.700703 | 986 | 1220 | 130 |
| GO:0044459 | plasma membrane part | GO|GO.CC | 6.04e-05 | 4e-07 | 1.521192 | 986 | 1983 | 189 |
Save Enrichment Analysis results
save(enrichment, file='./../Data/enrichmentAnalysis_imputed.RData')
#load('./../Data/enrichmentAnalysis.RData')
sessionInfo()
## R version 3.6.3 (2020-02-29)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.4 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1
##
## locale:
## [1] LC_CTYPE=en_GB.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_GB.UTF-8 LC_COLLATE=en_GB.UTF-8
## [5] LC_MONETARY=en_GB.UTF-8 LC_MESSAGES=en_GB.UTF-8
## [7] LC_PAPER=en_GB.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_GB.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats4 parallel stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] org.Hs.eg.db_3.8.2
## [2] BrainDiseaseCollection_1.00
## [3] anRichment_1.01-2
## [4] TxDb.Mmusculus.UCSC.mm10.knownGene_3.4.7
## [5] TxDb.Hsapiens.UCSC.hg19.knownGene_3.2.2
## [6] GenomicFeatures_1.36.4
## [7] GenomicRanges_1.36.1
## [8] GenomeInfoDb_1.20.0
## [9] anRichmentMethods_0.90-1
## [10] WGCNA_1.69
## [11] fastcluster_1.1.25
## [12] dynamicTreeCut_1.63-1
## [13] GO.db_3.8.2
## [14] AnnotationDbi_1.46.1
## [15] IRanges_2.18.3
## [16] S4Vectors_0.22.1
## [17] Biobase_2.44.0
## [18] BiocGenerics_0.30.0
## [19] biomaRt_2.40.5
## [20] knitr_1.28
## [21] doParallel_1.0.15
## [22] iterators_1.0.12
## [23] foreach_1.5.0
## [24] polycor_0.7-10
## [25] expss_0.10.2
## [26] GGally_1.5.0
## [27] gridExtra_2.3
## [28] viridis_0.5.1
## [29] viridisLite_0.3.0
## [30] RColorBrewer_1.1-2
## [31] dendextend_1.13.4
## [32] plotly_4.9.2
## [33] glue_1.3.2
## [34] reshape2_1.4.3
## [35] forcats_0.5.0
## [36] stringr_1.4.0
## [37] dplyr_0.8.5
## [38] purrr_0.3.3
## [39] readr_1.3.1
## [40] tidyr_1.0.2
## [41] tibble_3.0.0
## [42] ggplot2_3.3.0
## [43] tidyverse_1.3.0
##
## loaded via a namespace (and not attached):
## [1] readxl_1.3.1 backports_1.1.5
## [3] Hmisc_4.4-0 plyr_1.8.6
## [5] lazyeval_0.2.2 splines_3.6.3
## [7] crosstalk_1.1.0.1 BiocParallel_1.18.1
## [9] digest_0.6.25 htmltools_0.4.0
## [11] fansi_0.4.1 magrittr_1.5
## [13] checkmate_2.0.0 memoise_1.1.0
## [15] cluster_2.1.0 annotate_1.62.0
## [17] Biostrings_2.52.0 modelr_0.1.6
## [19] matrixStats_0.56.0 prettyunits_1.1.1
## [21] jpeg_0.1-8.1 colorspace_1.4-1
## [23] blob_1.2.1 rvest_0.3.5
## [25] haven_2.2.0 xfun_0.12
## [27] crayon_1.3.4 RCurl_1.98-1.1
## [29] jsonlite_1.6.1 genefilter_1.66.0
## [31] impute_1.58.0 survival_3.1-11
## [33] gtable_0.3.0 zlibbioc_1.30.0
## [35] XVector_0.24.0 DelayedArray_0.10.0
## [37] scales_1.1.0 mvtnorm_1.1-0
## [39] DBI_1.1.0 Rcpp_1.0.4
## [41] xtable_1.8-4 progress_1.2.2
## [43] htmlTable_1.13.3 foreign_0.8-75
## [45] bit_1.1-15.2 preprocessCore_1.46.0
## [47] Formula_1.2-3 htmlwidgets_1.5.1
## [49] httr_1.4.1 acepack_1.4.1
## [51] ellipsis_0.3.0 farver_2.0.3
## [53] pkgconfig_2.0.3 reshape_0.8.8
## [55] XML_3.99-0.3 nnet_7.3-13
## [57] dbplyr_1.4.2 locfit_1.5-9.4
## [59] labeling_0.3 tidyselect_1.0.0
## [61] rlang_0.4.5 munsell_0.5.0
## [63] cellranger_1.1.0 tools_3.6.3
## [65] cli_2.0.2 generics_0.0.2
## [67] RSQLite_2.2.0 broom_0.5.5
## [69] evaluate_0.14 yaml_2.2.1
## [71] bit64_0.9-7 fs_1.4.0
## [73] nlme_3.1-144 xml2_1.2.5
## [75] compiler_3.6.3 rstudioapi_0.11
## [77] curl_4.3 png_0.1-7
## [79] reprex_0.3.0 geneplotter_1.62.0
## [81] stringi_1.4.6 highr_0.8
## [83] lattice_0.20-40 Matrix_1.2-18
## [85] vctrs_0.2.4 pillar_1.4.3
## [87] lifecycle_0.2.0 data.table_1.12.8
## [89] bitops_1.0-6 rtracklayer_1.44.4
## [91] R6_2.4.1 latticeExtra_0.6-29
## [93] codetools_0.2-16 assertthat_0.2.1
## [95] SummarizedExperiment_1.14.1 DESeq2_1.24.0
## [97] withr_2.1.2 GenomicAlignments_1.20.1
## [99] Rsamtools_2.0.3 GenomeInfoDbData_1.2.1
## [101] mgcv_1.8-31 hms_0.5.3
## [103] grid_3.6.3 rpart_4.1-15
## [105] rmarkdown_2.1 lubridate_1.7.4
## [107] base64enc_0.1-3